#' Cluster-Adjusted Confidence Intervals And p-Values For GLM
#'
#' Computes p-values and confidence intervals for GLM models based on cluster-specific model estimation (Ibragimov and Muller 2010). A separate model is estimated in each cluster, and then p-values and confidence intervals are computed based on a t/normal distribution of the cluster-specific estimates.
#'
#' @param mod A model estimated using \code{glm}.
#' @param dat The data set used to estimate \code{mod}.
#' @param cluster A formula of the clustering variable.
#' @param ci.level What confidence level should CIs reflect?
#' @param report Should a table of results be printed to the console?
#' @param drop Should clusters within which a model cannot be estimated be dropped?
#' @param truncate Should outlying cluster-specific beta estimates be excluded?
#' @param return.vcv Should a VCV matrix and the means of cluster-specific coefficient estimates be returned?
#'
#' @return A list with the elements
#' \item{p.values}{A matrix of the estimated p-values.}
#' \item{ci}{A matrix of confidence intervals.}
#' \item{vcv.hat}{Optional: A cluster-level variance-covariance matrix for coefficient estimates.}
#' \item{beta.bar}{Optional: A vector of means for cluster-specific coefficient estimates.}
#' @author Justin Esarey
#' @note Confidence intervals are centered on the cluster averaged estimate, which can diverge from original model estimates under several circumstances (e.g., if clusters have different numbers of observations). Consequently, confidence intervals may not be centered on original model estimates. If drop = TRUE, any cluster for which all coefficients cannot be estimated will be automatically dropped from the analysis. If truncate = TRUE, any cluster for which any coefficient is more than 6 times the interquartile range from the cross-cluster mean will also be dropped as an outlier.
#' @examples
#' \dontrun{
#' 
#' #####################################################################
#' # example one: predict whether respondent has a university degree
#' #####################################################################
#' 
#' require(effects)
#' data(WVS)
#' logit.model <- glm(degree ~ religion + gender + age, data=WVS, family=binomial(link="logit"))
#' summary(logit.model)
#' 
#' # compute cluster-adjusted p-values
#' clust.im.p <- cluster.im.glm(logit.model, WVS, ~ country, report = T)
#' 
#' ############################################################################
#' # example two: linear model of whether respondent has a university degree
#' #              with interaction between gender and age + country FEs
#' ############################################################################
#' 
#' WVS$degree.n <- as.numeric(WVS$degree)
#' WVS$gender.n <- as.numeric(WVS$gender)
#' WVS$genderXage <- WVS$gender.n * WVS$age
#' lin.model <- glm(degree.n ~ gender.n + age + genderXage + religion + as.factor(country), data=WVS)
#' 
#' # compute marginal effect of male gender on probability of obtaining a university degree
#' # using conventional standard errors
#' age.vec <- seq(from=18, to=90, by=1)
#' me.age <- coefficients(lin.model)[2] + coefficients(lin.model)[4]*age.vec
#' plot(me.age ~ age.vec, type="l", ylim=c(-0.1, 0.1), xlab="age", 
#'      ylab="ME of male gender on Pr(university degree)")
#' se.age <- sqrt( vcov(lin.model)[2,2] + vcov(lin.model)[4,4]*(age.vec)^2 + 
#'                 2*vcov(lin.model)[2,4]*age.vec)
#' ci.h <- me.age + qt(0.975, lower.tail=T, df=lin.model$df.residual) * se.age
#' ci.l <- me.age - qt(0.975, lower.tail=T, df=lin.model$df.residual) * se.age
#' lines(ci.h ~ age.vec, lty=2)
#' lines(ci.l ~ age.vec, lty=2)
#' 
#' 
#' # cluster on country, compute CIs for marginal effect of gender on degree attainment
#' # drop the FEs (absorbed into cluster-level coefficients)
#' lin.model.n <- glm(degree.n ~ gender.n + age + genderXage + religion, data=WVS)
#' clust.im.result <- cluster.im.glm(lin.model.n, WVS, ~ country, report = T, return.vcv = T)
#' # compute ME using average of cluster-level estimates (CIs center on this)
#' me.age.im <- clust.im.result$beta.bar[2] + clust.im.result$beta.bar[4]*age.vec
#' se.age.im <- sqrt( clust.im.result$vcv[2,2] + clust.im.result$vcv[4,4]*(age.vec)^2 +
#'                    2*clust.im.result$vcv[2,4]*age.vec)
#' # center the CIs on the ME using average of cluster-level estimates
#' # important: divide by sqrt(G) to convert SE of cluster-level estimates 
#' #            into SE of the mean, where G = number of clusters
#' G <- length(unique(WVS$country))
#' ci.h.im <- me.age.im + qt(0.975, lower.tail=T, df=(G-1)) * se.age.im/sqrt(G)
#' ci.l.im <- me.age.im - qt(0.975, lower.tail=T, df=(G-1)) * se.age.im/sqrt(G)
#' plot(me.age.im ~ age.vec, type="l", ylim=c(-0.2, 0.2), xlab="age", 
#'      ylab="ME of male gender on Pr(university degree)")
#' lines(ci.h.im ~ age.vec, lty=2)
#' lines(ci.l.im ~ age.vec, lty=2)
#' # for comparison, here's the ME estimate and CIs from the baseline model
#' lines(me.age ~ age.vec, lty=1, col="gray")
#' lines(ci.h ~ age.vec, lty=3, col="gray")
#' lines(ci.l ~ age.vec, lty=3, col="gray")
#' 
#' }
#' @rdname cluster.im.glm
#' @references Esarey, Justin, and Andrew Menger. 2017. "Practical and Effective Approaches to Dealing with Clustered Data." \emph{Political Science Research and Methods} forthcoming: 1-35. <URL:http://jee3.web.rice.edu/cluster-paper.pdf>.
#' @references Ibragimov, Rustam, and Ulrich K. Muller. 2010. "t-Statistic Based Correlation and Heterogeneity Robust Inference." \emph{Journal of Business & Economic Statistics} 28(4): 453-468. <DOI:10.1198/jbes.2009.08046>.
#' @import stats
#' @importFrom utils write.table
#' @export

cluster.im.glm<-function(mod, dat, cluster, ci.level = 0.95, report = TRUE, drop = FALSE, truncate = FALSE, return.vcv = FALSE){
  
  
  form <- mod$formula                                                   # what is the formula of this model?
  variables <- all.vars(form)                                           # what variables are in this model?
  clust.name <- all.vars(cluster)                                       # what is the name of the clustering variable?
  used.idx <- which(rownames(dat) %in% rownames(mod$model))             # what were the actively used observations in the model?
  dat <- dat[used.idx,]                                                 # keep only active observations
  clust <- as.vector(unlist(dat[[clust.name]]))                         # store cluster index in convenient vector
  G<-length(unique(clust))                                              # how many clusters are in this model?
  ind.variables.full <- names(coefficients(mod))                        # what independent variables are in this model?
  ind.variables <- rownames(summary(mod)$coefficients)                  # what non-dropped independent variables in this model?
  
  b.clust <- matrix(data = NA, nrow = G, ncol = length(ind.variables))  # a matrix to store the betas
  n.clust <- c() 
  
  G.o <- G
  for(i in 1:G){
     
    clust.ind <- which(clust == unique(clust)[i])                                             # select obs in cluster i
    
    clust.dat <- dat[clust.ind,]                                                              # create the cluster i data set
    clust.mod <- suppressWarnings(tryCatch(glm(form, data = clust.dat, family = mod$family),  # run cluster model
                                           error = function(e){return(NULL)}))   
    
    if(is.null(clust.mod) == FALSE ){
      if(clust.mod$converged == 0){clust.mod <- NULL}                  # judge GLM as failure if convergence not achieved
    }
    fail <- is.null(clust.mod)                                         # determine whether the GLM process created an error
    
    
    # should we stop if one cluster-specific model does not converge?
    if(drop==FALSE){
      if(fail == T){stop("cluster-specific model returned error (try drop = TRUE)", call.=FALSE)}
      
      # detect whether variables were dropped in individual clusters
      if(length(rownames(summary(clust.mod)$coefficients)) != length(ind.variables)){
        stop("cluster-specific model(s) dropped variables; ensure that all variables vary within clusters", call.=FALSE)
      }
      
      b.clust[i,] <- coefficients(clust.mod)[ind.variables]                                    # store the cluster i beta coefficient
      
    }else{
      if(fail == F){
        # detect whether variables were dropped in individual clusters
        if(length(rownames(summary(clust.mod)$coefficients)) != length(ind.variables)){
          stop("cluster-specific model(s) dropped variables; ensure that all variables vary within clusters", call.=FALSE)
        }
        
        b.clust[i,] <- coefficients(clust.mod)[ind.variables]                                  # store the cluster i beta coefficient
      }else{
        b.clust[i,] <- NA
      }
    }
  
  }

  if(drop==TRUE){
    dropped.nc <- 0                                                             # store number of non-converged clusters
    b.clust <- na.omit(b.clust)
    dropped.nc <- length(attr(b.clust, "na.action"))                            # record number of models dropped
    G.t <- dim(b.clust)[1]
    if(G.t == 0){stop("all clusters were dropped (see help file).")}
  }
  
  # remove clusters with outlying betas
  dropped <- 0                                                                 # store number of outlying estimates
  if(truncate==TRUE){                                                          # if drop outlying betas...
    
    IQR <- apply(FUN=quantile, MARGIN=2, X=b.clust, probs=c(0.25, 0.75))       # calculate inter-quartile range
    
    b.clust.save <- b.clust                                                    # non-outlying beta identifier
    for(i in 1:dim(b.clust)[2]){
      b.clust.save[,i] <- ifelse( abs(b.clust[,i]) >                           # determine which estimates to save
            (abs(mean(b.clust[,i])) + 6*abs(IQR[2,i] - IQR[1,i])), 0, 1)
    }
    
    save.clust <- apply(X=b.clust.save, MARGIN=1, FUN=min)                     # which rows had outlying betas
    dropped <- dim(b.clust)[1] - sum(save.clust)                               # how many clusters dropped?
    
    b.clust.adj <- cbind(b.clust, save.clust)                                  # append the outlier ID to beta matrix
    
    b.clust <- subset(b.clust,                                                 # drop the outlying betas
            subset=(save.clust==1), select=1:dim(b.clust)[2])
    
  }
  
  G <- dim(b.clust)[1]
  if(G == 0){stop("all clusters were dropped (see help file).")}
  
  
  b.hat <- colMeans(b.clust)                                # calculate the avg beta across clusters
  b.dev <- sweep(b.clust, MARGIN = 2, STATS = b.hat)        # sweep out the avg betas
  #s.hat <- sqrt( (1 / (G-1)) * colSums(b.dev^2) )          # manually calculate the SE of beta estimates across clusters (deprecated)
  vcv.hat <- cov(b.dev)                                     # calculate VCV matrix
  rownames(vcv.hat) <- ind.variables
  colnames(vcv.hat) <- ind.variables
  s.hat <- sqrt(diag(vcv.hat))                              # calculate standard error

  t.hat <- sqrt(G) * (b.hat / s.hat)                        # calculate t-statistic
  
  names(b.hat) <- ind.variables
  
  # compute p-val based on # of clusters
  p.out <- 2*pmin( pt(t.hat, df = G-1, lower.tail = TRUE), pt(t.hat, df = G-1, lower.tail = FALSE) )
  
  
  # compute CIs
  ci.lo <- b.hat - qt((1-ci.level)/2, df=(G-1), lower.tail=FALSE)*(s.hat/sqrt(G))
  ci.hi <- b.hat + qt((1-ci.level)/2, df=(G-1), lower.tail=FALSE)*(s.hat/sqrt(G))
  
  out <- matrix(p.out, ncol=1)
  rownames(out) <- ind.variables

  out.p <- cbind( ind.variables, round(out, 3))
  out.p <- rbind(c("variable name", "cluster-adjusted p-value"), out.p)
  
  out.ci <- cbind(ci.lo, ci.hi)
  rownames(out.ci) <- ind.variables
  colnames(out.ci) <- c("CI lower", "CI higher")
  
  print.ci <- cbind(ind.variables, ci.lo, ci.hi)
  print.ci <- rbind(c("variable name", "CI lower", "CI higher"), print.ci)
  

  printmat <- function(m){
    write.table(format(m, justify="right"), row.names=F, col.names=F, quote=F, sep = "   ")
  }
  
  if(report==T){
    cat("\n", "Cluster-Adjusted p-values: ", "\n", "\n")
    printmat(out.p)
    
    cat("\n", "Confidence Intervals (centered on cluster-averaged results):", "\n", "\n")
    printmat(print.ci)
        
    if(G.o > G){
      cat("\n", "Note:", G.o - G, "clusters were dropped (see help file).", "\n", "\n")
    }
    
    if(length(ind.variables) < length(ind.variables.full)){
      cat("\n", "\n", "****", "Note: ", length(ind.variables.full) - length(ind.variables), " variables were unidentified in the model and are not reported.", "****", "\n", sep="")
      cat("Variables not reported:", "\n", sep="")
      cat(ind.variables.full[!ind.variables.full %in% ind.variables], sep=", ")
      cat("\n", "\n")
    }
    
  }
 
  
  out.list<-list()
  out.list[["p.values"]] <- out
  out.list[["ci"]] <- out.ci
  if(return.vcv == TRUE){out.list[["vcv.hat"]] <- vcv.hat}
  if(return.vcv == TRUE){out.list[["beta.bar"]] <- b.hat}
  return(invisible(out.list))
  
}

